---
title: "Cleaning World Values Survey"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load integrate wvs [in reality many problems with this]
  wvs_raw <- 
    readRDS("../raw-data/y-multi-world-values-survey/integrated-data-2022/WVS_TimeSeries_1981_2022_Rds_v3_0.rds")

# load fieldwork dates, transcribed from reports
  fielddates_raw <- 
    import("../raw-data/y-multi-world-values-survey/fieldwork-dates/wvs_missing_fieldwork_dates.xlsx")
  
# load religion recodes
  rel_recodes_raw <- 
    import("../raw-data/y-multi-world-values-survey/religion-recodes/wvs_religions_recodes.csv")
```

# Clean fieldwork dates

* Note, the 2022 version of the integrated dataset has its own start/end of fieldwork (by country/wave) dates columns; but we have scraped these from the original study documents by hand and have found our version to be far more accurate.

* Note: some country-rounds have obviously incorrect dates:
  - End dates before start dates
  - Full survey rounds completed in a few days
  - FUll survey rounds completed over multiple years
Obvious data entry errors are dropped from the dates dataframe; reasons are given in the Excel dates file.
Note that not all date ranges were transcribed; only those country/rounds for which a social distance question was asked.

```{r}
# clean
  fielddates_clean <-
    fielddates_raw %>% 
    filter(is.na(`uncertainty?`) 
           & !is.na(interview_start_date) 
           & !is.na(interview_end_date)) %>% 
    mutate(
      resp_interview_start_date = as.Date(interview_start_date, "%d %b %Y"),
      resp_interview_end_date = as.Date(interview_end_date, "%d %b %Y"),
      check_positive_diff = (resp_interview_end_date - resp_interview_start_date)>0) %>% 
    mutate(resp_country_common = countryname(resp_country_common)) %>% 
    select(
      resp_country_common,
      resp_round,
      resp_interview_start_date,
      resp_interview_end_date) %>% 
    arrange(resp_round, resp_country_common)

# declare additional dates (reasoning and issues given in text below)
  additional_imputed_dates <- 
    tribble(
      ~resp_country_common, ~resp_round, ~resp_interview_start_date, ~resp_interview_end_date,
      
      # wave 1
      "Argentina", "Wave 1", "1984-01-01", "1984-12-31", # technical doc gives 1st of year
      "Australia", "Wave 1", "1981-01-01", "1981-12-31", # technical doc gives 1st of year
      "Canada", "Wave 1", "1981-01-01", "1984-12-31", # no mention in technical doc so input full wave 1 range
      "Hungary", "Wave 1", "1982-01-01", "1982-12-31", # technical doc gives 1st of year
      "Japan", "Wave 1", "1981-01-01", "1981-12-31", # technical doc gives 1st of year
      "Mexico", "Wave 1", "1981-01-01", "1981-12-31", # technical doc gives 1st of year
      "South Africa", "Wave 1", "1982-01-01", "1982-12-31", # technical doc gives 1st of year
      "South Korea", "Wave 1", "1982-01-01", "1982-12-31", # technical doc gives 1st of year
      "Sweden", "Wave 1", "1981-01-01", "1981-12-31", # technical doc gives 1st of year
      "US", "Wave 1", "1981-01-01", "1981-12-31", # technical doc gives 1st of year
      
      # wave 2
      "Canada", "Wave 2", "1988-12-01", "1992-01-31", # no mention in technical doc so input full wave 2 range (from observed data; note that website gives end as 1994 but not surveys past 1992-01-31 in data)
      "Sweden", "Wave 2", "1988-12-01", "1992-01-31", # no mention in technical doc so input full wave 2 range (from observed data; note that website gives end as 1994 but not surveys past 1992-01-31 in data)
      "US", "Wave 2", "1988-12-01", "1992-01-31", # no mention in technical doc so input full wave 2 range (from observed data; note that website gives end as 1994 but not surveys past 1992-01-31 in data)
      
      # wave 3
      "India", "Wave 3", "1995-01-01", "1995-12-31", # in the tech doc wave 4 date range is given erroneously; 1995 given on website for india https://www.worldvaluessurvey.org/WVSDocumentationWV3.jsp
      "South Korea", "Wave 3", "1996-01-01", "1996-12-31", # in the tech doc wave 4 date range is given erroneously; 1996 given on website for south korea
      "Australia","Wave 3", "1995-01-01", "1995-12-31", # 1995 given on website  
      "Czechia","Wave 3", "1998-01-01", "1998-12-31", # 1998 given on website    
      "Finland","Wave 3", "1996-01-01", "1996-12-31", # 1996 given on website  
      "Japan","Wave 3", "1995-01-01", "1995-12-31", # 1995 given on website
      "Taiwan", "Wave 3", "1998-01-01", "1998-12-31", # 1998 given on website
      "Chile", "Wave 3", "1996-01-01", "1996-12-31", # 1996 given on website
      
      # wave 4
      "Moldova", "Wave 4", "2002-01-01", "2002-12-31", # 2002 given on website 
      
      # wave 5
      "Guatemala", "Wave 5", "2004-01-01", "2004-12-31", # 2004 given on website 
      
      # wave 6
      "Hong Kong", "Wave 6", "2014-01-01", "2014-12-31", # 2014 given on website 
      "India", "Wave 6", "2012-01-01", "2012-12-31") %>% # 2012 given on website 
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date),
      resp_interview_end_date = as.Date(resp_interview_end_date),
      resp_country_common = countryname(resp_country_common))
  
# stack
  fielddates_w_extras <- 
    fielddates_clean %>% 
    bind_rows(additional_imputed_dates)
```

World Values Survey; final, missing dates, reasons:

* Wave 1, Argentina, 1005
  - Technical document gives 01-01-1984 - 01-01-1984; implausible.
* Wave 1, Australia, 1228
  - Technical document gives 01-01-1981 - 01-01-1981; implausible.
* Wave 1, Canada, 1254
  - No mention in Wave 1 technical document
* Wave 1, Hungary, 1464
  - Technical document gives 01-01-1982 - 31-12-1982; implausible.
* Wave 1, Japan, 1204
  - Technical document gives 01-01-1981 - 01-01-1981; implausible.
* Wave 1, Mexico, 1837
  - Technical document gives 01-01-1981 - 01-01-1981; implausible.
* Wave 1, South Africa, 1596
  - Technical document gives 01-01-1982 - 01-01-1982; implausible.
* Wave 1, South Korea, 970
  - Technical document gives 01-01-1982 - 01-01-1982; implausible.
* Wave 1, Sweden, 954
  - Technical document gives 01-01-1981 - 01-01-1981; implausible.
* Wave 1, US, 2325
  - Technical document gives 01-01-1981 - 01-01-1981; implausible.
  
* Wave 2, Canada, 1730
  - No mention in Wave 2 technical document  
* Wave 2, Sweden, 1047
  - No mention in Wave 2 technical document 
* Wave 2, US, 1839
  - No mention in Wave 2 technical document 
  
* Wave 3, Australia, 2048
  - Technical document gives 01-01-2000 - 31-12-2004; implausible.
* Wave 3, Chile, 1000
  - Technical document gives 21-06-1996 - 21-06-1996; implausible.
* Wave 3, Czechia, 1147
  - No mention in Wave 3 technical document 
* Wave 3, Finland, 987
  - Technical document gives 21-04-2000 - 21-06-1996; implausible.
* Wave 3, Japan, 1054
  - No mention in Wave 3 technical document 
* Wave 3, Taiwan, 780

  - No mention in Wave 3 technical document
* Wave 4, Moldova, 1008
  - Technical document gives 01-01-2000 - 31-12-2004; implausible.
  
* Wave 5, Guatemala, 1000
  - No mention in Wave 5 technical document
  
* Wave 6, Hong Kong, 1000
  - No mention in Wave 6 technical document
* Wave 6, India. 4078
  - No mention in Wave 6 technical document

# Clean data on respondent religion

There are a large number of inconsistencies in the original data, seen when comparing F025 (major religious groups) and F025_WVS (all denominations -- this column appears to reflect the codings in the original country-level datasets). I take all unique combinations of F025/F025_WVS in the original data and create a new variable for the major groups. This defers to F025_WVS unless F025 provides more information than F025_WVS. I also take clues from the country where the combination appears, and from the average religiosity of those in that combination-group.

```{r}
# take unique combinations of major religious groups and denominations from the raw wvs data
  wvs_religion_toclean <- 
    wvs_raw %>%
    mutate(
      F025 = to_character(F025),
      F025_WVS = to_character(F025_WVS),
      resp_religiosity_recode = 
          case_when(
            F034 == 1 ~ 1,
            F034 %in% c(2, 3) ~ 0,
            TRUE ~ NA_real_
          )) %>% 
    group_by(F025, F025_WVS) %>% 
    summarise(
      n = n(),
      countries = paste(unique(COUNTRY_ALPHA), collapse = "; "),
      mean_resp_religiosity_recode = mean(resp_religiosity_recode, na.rm = T)) %>% 
    ungroup() %>% 
    select(countries, mean_resp_religiosity_recode, n, F025_WVS, F025)

# write to csv for manual work
#  write.csv(wvs_religion_toclean, "wvs_religions_recodes_new.csv", row.names = F)
```
Now clean the dataframe that contains the recodes manually inputted by Jiayi Li and Gareth Nellis:

```{r}
# clean
  rel_recodes_clean <- 
    rel_recodes_raw %>% 
    filter(resp_religion != "") %>% 
    select(F025_WVS, F025, resp_religion) %>% 
    mutate(
      resp_religion = 
        case_when(
          resp_religion == "NA_character_" ~ NA_character_,
          TRUE ~ resp_religion))
```

# Fix religion coding error in Albania

# Clean main WVS data

```{r}
# clean
  wvs_clean <- 
    wvs_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "World Values Survey",
        
      # round number (character vector, title case)  
        resp_round =
          case_when(
            S002VS == 1 ~ "Wave 1",
            S002VS == 2 ~ "Wave 2",
            S002VS == 3 ~ "Wave 3",
            S002VS == 4 ~ "Wave 4",
            S002VS == 5 ~ "Wave 5",
            S002VS == 6 ~ "Wave 6",
            S002VS == 7 ~ "Wave 7"),      
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3GuX5sZ",

      # survey mode (in-person/phone/internet/mail)
        resp_survey_mode =
          case_when(
            mode %in% c(1, 2, 6) ~ "in-person",
            mode == 3 ~ "internet",
            mode == 4 ~ "mail",
            mode == 5 ~ "phone",
            TRUE ~ NA_character_), 

      # country (character vector; list of countries as written in original source)
        resp_country_original = to_character(S003),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original)) %>% 
    
      # remove observations from northern cyprus
      filter(!X048ISO %in% c(196004, 196006)) %>%  # Famagusta, Kyrenia (northern Cyprus)

      mutate(
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = as.Date(as.character(S012), format = "%Y%m%d")) %>% 
  
      # interview start and end date (for surveys where we have only ranges)
      left_join(
        fielddates_w_extras, by = c("resp_country_common", "resp_round")) %>% 
      mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        # note: there are clear errors and contradictions in the original recode variable (F025), so we redid this manually using information in both F025 and F025_WVS, see above
        F025_WVS = to_character(F025_WVS),
        F025 = to_character(F025)) %>% 
      left_join(
        rel_recodes_clean, by = c("F025_WVS", "F025")) %>% # merge checked; clean
      mutate(

      # respondent's denomination (character vector)
        resp_denomination =
          to_character(F025_WVS),
        resp_denomination = 
          case_when(
            resp_denomination == "Protestant; nfd" ~ "Protestant",
            TRUE ~ resp_denomination),

      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age = 
          dplyr::recode(
            as.character(X003),
            "-1" = NA_character_,
            "-2" = NA_character_,
            "-3" = NA_character_,
            "-4" = NA_character_,
            "-5" = NA_character_,
            .default = as.character(X003)),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(X025), # significant missingness here but this is evident in the country-specific variable too (indeed, number of missings match exactly); X025R (recoded) is insufficiently detailed to distinguish
              "1" = "1. Inadequately completed elementary education [No education]",
              "2" = "2. Completed (compulsory) elementary education [Primary]",
              "3" = "3. Incomplete secondary school: technical/vocational type/(Compulsory) elementary education and basic vocational qualification [Primary]",
              "4" = "4. Complete secondary school: technical/vocational type/Secondary, intermediate vocational qualification [Primary]",
              "5" = "5. Incomplete secondary: university-preparatory type/Secondary, intermediate general qualification [Primary]",
              "6" = "6. Complete secondary: university-preparatory type/Full secondary, maturity level certificate [Primary]",
              "7" = "7. Some university without degree/Higher education - lower-level tertiary certificate [Primary]",
              "8" = "8. University with degree/Higher education - upper-level tertiary certificate [College]",
            .default = NA_character_),       

      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = 
          case_when(
            X001 == 1 ~ 0,
            X001 == 2 ~ 1,
            TRUE ~ NA_real_),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural =
          case_when(
            X049 %in% c(1:3) ~ 1, # here defining rural as towns of 10k people or under; only comprehensive measure (X050C, Urban/Rural habitat, has much higher missingness)
            X049 %in% c(4:8) ~ 0,
            TRUE ~ NA_real_),      

    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: A124.05; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? Muslims.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Muslim; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(A124_05),
            "-10" = NA_character_,
            "-9" = NA_character_,
            "-8" = NA_character_,
            "-7" = NA_character_,
            "-6" = NA_character_,
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            A124_05 %in% c(0) ~ 0,
            A124_05 %in% c(1) ~ 1,
            TRUE ~ NA_real_),

    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: A124.10; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? Jews.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Jewish; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = 
          dplyr::recode(
            as.character(A124_10),
            "-10" = NA_character_,
            "-9" = NA_character_,
            "-8" = NA_character_,
            "-7" = NA_character_,
            "-6" = NA_character_,
            "-5" = NA_character_,
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            A124_10 %in% c(0) ~ 0,
            A124_10 %in% c(1) ~ 1,
            TRUE ~ NA_real_),

    #########################  
    ### SOCIAL DISTANCE 3 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_3_qinfo = "NUM: A124.12; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? People of a different religion.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_3_original = 
          dplyr::recode(
            as.character(A124_12),
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_3_bin_recode = 
          case_when(
            A124_12 %in% c(0) ~ 0,
            A124_12 %in% c(1) ~ 1,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 4 ###  
    #########################
    
    ## NOTE: this question is only asked in Puerto Rico in wave 4; confirm "Protestant" is recorded correctly in resp_denomination; CONFIRMED   
    
      # original question number; question text; response options (input above)
        resp_soc_dist_4_qinfo = "NUM: A124.23; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? Protestants.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Protestant; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_4_original = 
          dplyr::recode(
            as.character(A124_23),
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_4_bin_recode = 
          case_when(
            A124_23 %in% c(0) ~ 0,
            A124_23 %in% c(1) ~ 1,
            TRUE ~ NA_real_), 
    
    #########################  
    ### SOCIAL DISTANCE 5 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_5_qinfo = "NUM: A124.24; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? Christians.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Christian; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_5_original = 
          dplyr::recode(
            as.character(A124_24),
            "-10" = NA_character_,
            "-9" = NA_character_,
            "-8" = NA_character_,
            "-7" = NA_character_,
            "-6" = NA_character_,
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_5_bin_recode = 
          case_when(
            A124_24 %in% c(0) ~ 0,
            A124_24 %in% c(1) ~ 1,
            TRUE ~ NA_real_),   
    
    #########################  
    ### SOCIAL DISTANCE 6 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_6_qinfo = "NUM: A124.29; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? Hindus.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Hindu; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_6_original = 
          dplyr::recode(
            as.character(A124_29),
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_6_bin_recode = 
          case_when(
            A124_29 %in% c(0) ~ 0,
            A124_29 %in% c(1) ~ 1,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 7 ###  
    #########################
    
    ## NOTE: this question is only asked in Iraq in waves 4 and 5; confirm "Sunni" is recorded correctly in resp_denomination; CHECKED
    
      # original question number; question text; response options (input above)
        resp_soc_dist_7_qinfo = "NUM: A124.45; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? Sunnis.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Sunni; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_7_original = 
          dplyr::recode(
            as.character(A124_45),
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_7_bin_recode = 
          case_when(
            A124_45 %in% c(0) ~ 0,
            A124_45 %in% c(1) ~ 1,
            TRUE ~ NA_real_),  
    
    #########################  
    ### SOCIAL DISTANCE 8 ###  
    #########################
    
    ## NOTE: this question is only asked in Iraq in waves 4 and 5; confirm "Sunni" is recorded correctly in resp_denomination; CHECKED    
    
      # original question number; question text; response options (input above)
        resp_soc_dist_8_qinfo = "NUM: A124.46; QTEXT: On this list are various groups of people. Could you identify any that you would not like to have as neighbours? Shia.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Shia; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_8_original = 
          dplyr::recode(
            as.character(A124_46),
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_
            ),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_8_bin_recode = 
          case_when(
            A124_46 %in% c(0) ~ 0,
            A124_46 %in% c(1) ~ 1,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: A165; QTEXT: Generally speaking, would you say that most people can be trusted or that you need to be very careful in dealing with people?; ROPTIONS: 1 = Most people can be trusted [=0] + 2 = Need to be very careful [=1]",

      # original response (as character vector)
        resp_gentrust_original = 
          dplyr::recode(
            as.character(A165),
            "-5" = NA_character_,
            "-4" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = 
          case_when(
            A165 == 1 ~ 0,
            A165 == 2 ~ 1,
            TRUE ~ NA_real_),    
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: F034; QTEXT: Independently of whether you go to church or not, would you say you are...; ROPTIONS: 1 = A religious person + 2 = Not a religious person + 3 = A convinced atheist",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(F034),
            `-5` = NA_real_,
            `-4` = NA_real_,
            `-2` = NA_real_,
            `-1` = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = 
          case_when(
            F034 == 1 ~ 1,
            F034 %in% c(2, 3) ~ 0,
            TRUE ~ NA_real_)
    
    ) %>% 
    select(starts_with("resp_"))
```

There is a labels error in Albania for 2004. This was discovered by downloading the DTA version of the raw data from this source:
https://www.worldvaluessurvey.org/WVSDocumentationWV4.jsp

I tabulated the religion variable there, v184b, and found the following distribution:

        Eastern Orthodox; nfd                Evangelicalism                    Islam; nfd                     No answer 
                          180                            13                           580                             4 
                  Noneligious                    Other; nfd Roman Catholic; Latin Church; 
                          129                             6                            88 
                          
These proportions appear correct -- Albania is a Muslim-majority country -- and I make the appropriate fixes to the denomination and religion variables.                          

```{r}
# fix errors
  wvs_clean_albania_w4_fixed <-
    wvs_clean %>%  
    mutate(
      
      resp_denomination = 
        case_when(
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Buddhist" ~ "Other; nfd",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Hindu" ~ "Evangelicalism",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Judaism" ~ "Roman Catholic; Latin Church;",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "No answer" ~ "No answer",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Non-religious" ~ "Noneligious",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Protestant" ~ "Eastern Orthodox; nfd",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Roman Catholic; Latin Church;" ~ "Islam; nfd",
          TRUE ~ resp_denomination),
      
      resp_religion =
        case_when(
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Other; nfd" ~ "Other religion",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Evangelicalism" ~ "Chistian",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Roman Catholic; Latin Church;" ~ "Christian",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "No answer" ~ NA_character_,
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Noneligious" ~ NA_character_,
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Eastern Orthodox; nfd" ~ "Christian",
          resp_round == "Wave 4" & resp_country_common == "Albania" & resp_denomination == "Islam; nfd" ~ "Muslim",
          TRUE ~ resp_religion))
```

# Save data

```{r}
  saveRDS(wvs_clean_albania_w4_fixed, "../cleaned-data/y-5-multi-world-values-survey.rds")
```